# SET DIRECTORY TO SOURCE FILE 

setwd("../../")

sink("tables/table2a_log.txt") #create log 

library(plyr)
library(tidyverse)
library(groundhog)
groundhog.library('apsrtable','2019-01-01',tolerate.R.version='4.1.2')

dat <- read.csv('data/cleaned/heuristics_cleaned_wide.csv', stringsAsFactors = FALSE)
dat$participant_republican <- ifelse(dat$pid > 4, 1, 0)
dat$MC_republican <- ifelse(dat$reppartyoneletter == 'R', 1, 0)
dat$participant_MC_party_match = dat$participant_republican == dat$MC_republican
#summary(lm(mcratingscale ~ participant_MC_party_match, dat)) # sanity checking

# Data at the level of votes/SIGs potentially shown
votes <- read.csv('data/Ancillary Data/vote_and_group_info_map.csv', stringsAsFactors = FALSE)

# Iterate over votes to create new columns in dat for each vote possibly shown
dat$sig_rating_supportive_vote_shown <- NA

for(i in 1:nrow(votes)) {
  sig_id <- votes$sig_id[i]
  sig_name <- votes$sig_name[i]
  house_vote_id <- votes$house_vote_id[i]
  
  # If house vote is missing, need to set rating to NA since it would not be used as a treatment
  dat[is.na(dat[,paste0('repactualvote', house_vote_id)]),
      paste0('reprating', sig_id)] <- NA
  # Ok to leave house vote intact if SIG rating is missing here
  
  # did the SIG give a positive or negative rating to the MC?
  dat[,paste0('sig_rating_supportive_', sig_id)] <-
    ifelse(dat[,paste0('reprating', sig_id)] < 50, 0, 1)
  
  # Define thinks_rep_agrees as whether the respondent thinks the rep agrees with them on this issue
  dat[,paste0('thinks_rep_agrees_', house_vote_id)] <-
    as.numeric(dat[,paste0('ownview', house_vote_id)] == dat[,paste0('repvoteperc', house_vote_id)])
  
  # As a control, add whether they actually agree
  dat[,paste0('rep_actually_agrees_', house_vote_id)] <-
    as.numeric(dat[,paste0('ownview', house_vote_id)] == dat[,paste0('repactualvote', house_vote_id)])
  
  # If this was the vote that was shown, load this vote into the proper variable
  this.vote.shown <- which(dat$randomrating_sig_id == sig_id)
  dat$sig_rating_supportive_vote_shown[this.vote.shown] <-
    dat[this.vote.shown, paste0('sig_rating_supportive_', sig_id)]
}

# Control for average values of treatment within version of survey shown (a or b)
for(version in c('a', 'b')) {
  sig.ids <- votes$sig_id[votes$split == version]
  
  # Average of whether SIG ratings (that could have been shown) were supportive
  dat$sig_rating_supportive_avg[dat$survey_version == version] <-
    rowMeans(dat[dat$survey_version == version, paste0('sig_rating_supportive_', sig.ids)], na.rm = TRUE)
}

# DV = average of whether person thinks they agree with their MC
dat$mean_thinks_rep_agrees <- rowMeans(dat[,startsWith(names(dat), 'thinks_rep_agrees_')], na.rm = TRUE)
dat$num_issue_qs <- rowSums(!is.na(dat[,startsWith(names(dat), 'thinks_rep_agrees_')]))
dat$mean_thinks_rep_agrees <- with(dat, mean_thinks_rep_agrees * num_issue_qs)

# Control = average of whether they actually agree
dat$actually_agrees_w_rep <- rowMeans(dat[,startsWith(names(dat), 'rep_actually_agrees_')], na.rm = TRUE) *
  dat$num_issue_qs

# Treatment = 0.5 if no rating shown, 0 if bad rating shown, 1 if good rating shown
dat$treat_rating_shown <- 0.5
dat$treat_rating_shown[with(dat, ratingsshown & dat$sig_rating_supportive_vote_shown == 1)] <- 1
dat$treat_rating_shown[with(dat, ratingsshown & dat$sig_rating_supportive_vote_shown == 0)] <- 0

table(dat$treat_rating_shown, dat$ratingsshown) # sanity check

# Include PID as a control for precision
dat$PID_agree = dat$pid # more positive values = respondent is more republican
dat$PID_agree[dat$MC_republican == 0] = 8 - dat$PID_agree[dat$MC_republican == 0]


linear.lm <- lm(mean_thinks_rep_agrees ~
                  treat_rating_shown +
                  pid*MC_republican + actually_agrees_w_rep + survey_version +
                  factor(sig_rating_supportive_avg),
                dat)

dummies.lm <- lm(mean_thinks_rep_agrees ~
                   (treat_rating_shown == 0) +
                   (treat_rating_shown == 1) +
                   PID_agree + actually_agrees_w_rep + survey_version +
                   factor(sig_rating_supportive_avg),
                 dat)

linear.lm.same.party <- lm(mean_thinks_rep_agrees ~
                             treat_rating_shown +
                             pid*MC_republican + actually_agrees_w_rep + survey_version +
                             factor(sig_rating_supportive_avg),
                           dat, subset = PID_agree > 4)

linear.lm.diff.party <- lm(mean_thinks_rep_agrees ~
                             treat_rating_shown +
                             pid*MC_republican + actually_agrees_w_rep + survey_version +
                             factor(sig_rating_supportive_avg),
                           dat, subset = PID_agree < 4)

#Table 2(a)
apsrtable::apsrtable(linear.lm, 
                     linear.lm.same.party, linear.lm.diff.party,
                     digits = 3, stars = 'default',
                     omitcoef = expression(grep("factor",coefnames)))
  #We only show the "treat\_rating\_shown" and "actually\_agrees\_w\_rep"
  #coefficients

sink()

#result mentioned in text without attitudinal controls
# linearnc.lm <- lm(mean_thinks_rep_agrees ~
#                   treat_rating_shown +
#                   survey_version +
#                   factor(sig_rating_supportive_avg),
#                 dat)
#summary(linearnc.lm)